% Main Script: run_mc_R01_DGP01.m
%   Description:
%       This script produces the results of a Monte Carlo simulation, where the DGP is set as a
%       simple static factor model and number of factors being 1, with various settings for the
%       number of units (N) and the number of time periods (T)
%   Output:
%       Table 1: Simulation results for the experiment in Section 5.1, N = 100, R = 1
%       Table 6: Simulation results for the experiment in Section 5.1, N = 50, R = 1
%       Table 7: Simulation results for the experiment in Section 5.1, N = 300, R = 1
%       Figure 1: Finite sample distributions of the LS and the debiased estimators,
%                   N = 100, T = 50, R = 1

% ========================================
% Setting simulation environment and parameters
% ========================================
clear all;
% Randomization seed 
rng(12345)
% Number of cores being used in the parallel computation
parpool('local', 8)
% Number of replications
params.reps = 5000;
% True parameter
params.beta0 = 0;
% Number of factors
params.R0 = 1;
% Save file directory
params.folder_name = 'matlogs/R01_DGP01';
% Number of units and number of time periods
arr_N = [50 100 300];  
arr_T = [20 50 100 300];
% Strength of factors
arr_kappa = [0:0.05:0.25 0.50 1.00]';

% ========================================
% Loop over all different combinations of N, T, and kappa
% ========================================
% Prepare arrays for outputs
res = cell(length(arr_N), length(arr_T), size(arr_kappa,1));
n_cases = length(arr_N) * length(arr_T) * size(arr_kappa,1);
i_case = 0;
for i_N = 1:length(arr_N)
    for i_T = 1:length(arr_T)
        for i_kappa = 1:size(arr_kappa,1)
            ticID_this_design = tic;
            params.N = arr_N(i_N);
            params.T = arr_T(i_T);
            params.kappa = arr_kappa(i_kappa,:);
            % Run the Monte Carlo simulation for the current parameters
            out = mc_weak_factors(params);
            res{i_N, i_T, i_kappa}.params = params;
            res{i_N, i_T, i_kappa}.LS.stats = out.LS.stats;
            res{i_N, i_T, i_kappa}.H.stats = out.H.stats;
            i_case = i_case + 1;
            fprintf(['Design %i/%i (N,T,kappa)=(%i, %i,' repmat(' %1.2f', 1, length(params.kappa)) ') took %1.2fs\n'], ...
                i_case, n_cases, params.N, params.T, params.kappa, toc(ticID_this_design));
        end
    end
end

% ========================================
% Construct oracle confidence intervals
% ========================================
% These oracle CIs are based on unknown design-specific least favorable critical values
LF_cv_LS = cell(size(res,1),size(res,2));
LF_cv = cell(size(res,1),size(res,2));
for i_N=1:size(res,1)
    for i_T=1:size(res,2)        
        for i_kappa=1:size(res,3)
            res_tmp = res{i_N,i_T,i_kappa};
            cv_LS_tmp(:,:,:,i_kappa) = res_tmp.LS.stats.cv;
            cv_tmp(:,:,i_kappa) = res_tmp.H.stats.cv;
        end
        % Maximize over kappa dimension
        LF_cv_LS{i_N,i_T} = max(cv_LS_tmp,[],4);
        LF_cv{i_N,i_T} = max(cv_tmp,[],3);
    end   
end
% Convert results to arrays
for i=1:numel(LF_cv_LS)
    LF_cv_LS_arr(:,:,:,i) = LF_cv_LS{i}; 
    LF_cv_arr(:,:,i) = LF_cv{i};
end
% Maximum across (N, T)
LF_cv_LS_max = max(LF_cv_LS_arr,[],4);
LF_cv_max = max(LF_cv_arr,[],3);

% ========================================
% Save the results
% ========================================
save(sprintf('%s/MC_merged_w_LFCV',params.folder_name),'res','LF_cv_LS','LF_cv_LS_max','LF_cv','LF_cv_max')
% Close parallel computation
if isunix
    delete(gcp('nocreate'));
end

% ========================================
% Output Table 1 & 6 & 7 as CSV files
% ========================================
if ~isfolder('tables')
    mkdir('tables')
end
% File list
folder_name = 'R01_DGP01';
fileinfo = dir(['matlogs/' folder_name '/*.mat']);
f_names = {fileinfo.name};
f_name_w_LFCV = f_names(contains(f_names,'w_LFCV'));
load(['matlogs/' folder_name '/' f_name_w_LFCV{:}]);
% Column names for output table
columnLabels = {'kappa', 'T'};
basic_estimators = {'LS','H'};
for i_b=1:length(basic_estimators)
    columnLabels = [columnLabels, {[basic_estimators{i_b} ' bias']}, {[basic_estimators{i_b} ' std']}, {[basic_estimators{i_b} ' rmse']}, ...
        {[basic_estimators{i_b} ' size']}, {[basic_estimators{i_b} ' len']}, {[basic_estimators{i_b} ' LFCV']}];
end
% Note that Table 1 is for N = 100,
%           Table 2 is for N = 50,
%           Table 3 is for N = 300
for i_N=1:length(arr_N)
    N = arr_N(i_N);
    tab_arr = [];
    for i_T=1:length(arr_T)
        T = arr_T(i_T);
        f_names_dgp = f_names(contains(f_names,sprintf('N%i_T%i',N,T)));
        for i_kappa = 1:length(arr_kappa)
            load(['matlogs/' folder_name '/' f_names_dgp{i_kappa}])
            if round(out.params.kappa,2) == round(arr_kappa(i_kappa),2)
                % LS.bias and and LS.rmse each contains three components, which are under
                %   (i) no correction, (ii) correcting bcorr2 & bcorr3, (iii) correcting bcorr1 &
                %       bcorr2 & bcorr3 in LS_factor.m
                % LS.std contains three components, which are under error assumption of
                %   (i) homoscedasticity, (ii) heteroscedasticity, (iii) serial correlation
                % LS.stats.rp and LS.stats.length are 3*3*3 arrays, where (2,1,2) represents
                %   (i) selecting alpha = 0.05 from [0.10 0.05 0.01]
                %   (ii) selecting beta estimate with no correction
                %       from [no_correction correcting_2_and_3 correcting_1_and_2_and_3]
                %   (iii) selecting se with heteroscedasticity assumption
                %       from [homoscedasticity  heteroscedasticity  serial_correlation]
                % H.stats elements all only have one component
                tab_arr = [tab_arr; ...
                        out.params.kappa, T, ...
                        out.LS.stats.bias(1), out.LS.stats.std(1), out.LS.stats.rmse(1), out.LS.stats.rp(2,1,2)*100, out.LS.stats.length(2,1,2), mean(2*out.LS.se(:,1,2)*LF_cv_LS{i_N,i_T}(2,1,2)),  ...                        
                        out.H.stats.bias(1), out.H.stats.std(1), out.H.stats.rmse(1), out.H.stats.size(1)*100, out.H.stats.length(1), mean(2*out.H.se(:,1)*LF_cv{i_N,i_T}(2,1))];
            end
        end
    end
    % Convert array to table
    if N == 100
        tab1 = array2table(tab_arr, 'VariableNames', columnLabels);
    elseif N == 50
        tab6 = array2table(tab_arr, 'VariableNames', columnLabels);
    elseif N == 300
        tab7 = array2table(tab_arr, 'VariableNames', columnLabels);
    end
end
% Output table
writetable(tab1, 'tables/table1.csv');
writetable(tab6, 'tables/table6.csv');
writetable(tab7, 'tables/table7.csv');

% ========================================
% Output Table 9 as CSV file
% ========================================
if ~isfolder('tables')
    mkdir('tables')
end
% File list
folder_name = 'R01_DGP01';
fileinfo = dir(['matlogs/' folder_name '/*.mat']);
f_names = {fileinfo.name};
f_name_w_LFCV = f_names(contains(f_names,'w_LFCV'));
load(['matlogs/' folder_name '/' f_name_w_LFCV{:}]);
% Column names for output table
columnLabels = {'kappa', 'N', 'T'};
basic_estimators = {'LS','H'};
for i_b=1:length(basic_estimators)
    columnLabels = [columnLabels, {[basic_estimators{i_b} ' size_s']}, {[basic_estimators{i_b} ' len_s']},];
end
tab_arr = [];
select_arr_N = [100 300];
select_arr_T = [100 300];
for i_N=1:length(select_arr_N)
    N = select_arr_N(i_N);
    for i_T=1:length(select_arr_T)
        T = select_arr_T(i_T);
        if (N == 100 && T == 100) || (N == 300 && T == 100) || (N == 300 && T == 300)
            f_names_dgp = f_names(contains(f_names,sprintf('N%i_T%i',N,T)));
            for i_kappa = 1:length(arr_kappa)
                load(['matlogs/' folder_name '/' f_names_dgp{i_kappa}])
                if round(out.params.kappa,2) == round(arr_kappa(i_kappa),2)
                    % LS.stats.rp and LS.stats.length are 3*3*3 arrays, where (2,1,2) represents
                    %   (i) selecting alpha = 0.05 from [0.10 0.05 0.01]
                    %   (ii) selecting beta estimate with no correction
                    %       from [no_correction correcting_2_and_3 correcting_1_and_2_and_3]
                    %   (iii) selecting se with heteroscedasticity assumption
                    %       from [homoscedasticity  heteroscedasticity  serial_correlation]
                    % H.stats elements all only have one component
                    tab_arr = [tab_arr; ...
                            out.params.kappa, N, T, ...
                            out.LS.stats.rp(2,1,2)*100, out.LS.stats.length(2,1,2), ...
                            out.H.stats.size_s(1)*100, out.H.stats.length_s(1)];
                end
            end
        end
    end
end
tab9 = array2table(tab_arr, 'VariableNames', columnLabels);
% Output table
writetable(tab9, 'tables/table9.csv');

% ========================================
% Output Figure 1 as PDF files
% ========================================
folder_name = 'R01_DGP01';
% Only plot the selected setting
R = 1;
N = 100;
T = 50;
arr_kappa = [0 0.1 0.2 1]';
fileinfo = dir(['matlogs/' folder_name '/*.mat']);
f_names = {fileinfo.name};
f_names = f_names(contains(f_names,sprintf('N%i_T%i',N,T)));
if ~isfolder('figures')
    mkdir('figures')
end
% Loop over selected kappa
for i_kappa=1:length(arr_kappa)
    kappa = arr_kappa(i_kappa);
    f_names_dgp = f_names(contains(f_names,sprintf('kappa%03d', kappa*100)));
    load(['matlogs/' folder_name '/' f_names_dgp{:}])
    % Plot joint histograms
    figure;
    histogram(out.LS.beta(:,1),'normalization','pdf');
    hold on;
    histogram(out.H.beta(:,1),'normalization','pdf');
    legend('LS','Debiased');
    % Output figure as PDF file
    set(gcf,'PaperOrientation','portrait');
    set(gcf,'PaperSize',[8 6]); % Height and width are set to be 8 and 6
    set(gcf,'PaperPosition',[0+0.05, 0+0.05, 8-2*0.05, 6-2*0.05]); % Margin is set to be 0.05
    saveas(gcf, ['figures/' sprintf('N%i_T%i_R%i_kappa%03d',out.params.N,out.params.T,out.params.R0,round(out.params.kappa*100)) '.pdf']);
    close all
end